Completed
Push — master ( f0654e...3057e3 )
by Elbert
01:05
created

wappalyzer.js ➔ ... ➔ ???   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 1
c 2
b 0
f 0
nc 1
nop 1
dl 0
loc 26
rs 8.8571
1
/**
2
 * Wappalyzer v5
3
 *
4
 * Created by Elbert Alias <[email protected]>
5
 *
6
 * License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
7
 */
8
9
'use strict';
10
11
const validation = {
12
  hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
13
  hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/
14
};
15
16
class Wappalyzer {
17
  constructor() {
18
    this.apps = {};
19
    this.categories = {};
20
    this.driver = {};
21
22
    this.detected = {};
23
    this.hostnameCache = {};
24
    this.adCache = [];
25
26
    this.config = {
27
      websiteURL: 'https://wappalyzer.com/',
28
      twitterURL: 'https://twitter.com/Wappalyzer',
29
      githubURL: 'https://github.com/AliasIO/Wappalyzer',
30
    };
31
  }
32
33
  /**
34
   * Log messages to console
35
   */
36
  log(message, source, type) {
37
    this.driver.log(message, source || '', type || 'debug');
38
  }
39
40
  analyze(hostname, url, data, context) {
41
    var apps = {};
42
43
    // Remove hash from URL
44
    data.url = url = url.split('#')[0];
45
46
    if ( typeof data.html !== 'string' ) {
47
      data.html = '';
48
    }
49
50
    if ( this.detected[url] === undefined ) {
51
      this.detected[url] = {};
52
    }
53
54
    Object.keys(this.apps).forEach(appName => {
55
      apps[appName] = this.detected[url] && this.detected[url][appName] ? this.detected[url][appName] : new Application(appName, this.apps[appName]);
56
57
      var app = apps[appName];
58
59
      if ( url ) {
60
        this.analyzeUrl(app, url);
61
      }
62
63
      if ( data.html ) {
64
        this.analyzeHtml(app, data.html);
65
        this.analyzeScript(app, data.html);
66
        this.analyzeMeta(app, data.html);
67
      }
68
69
      if ( data.headers ) {
70
        this.analyzeHeaders(app, data.headers);
71
      }
72
73
      if ( data.env ) {
74
        this.analyzeEnv(app, data.env);
75
      }
76
77
      if ( data.robotsTxt ) {
78
        this.analyzeRobotsTxt(app, data.robotsTxt);
79
      }
80
    })
81
82
    Object.keys(apps).forEach(appName => {
83
      var app = apps[appName];
84
85
      if ( !app.detected || !app.getConfidence() ) {
86
        delete apps[app.name];
87
      }
88
    });
89
90
    this.resolveExcludes(apps);
91
    this.resolveImplies(apps, url);
92
93
    this.cacheDetectedApps(apps, url);
94
    this.trackDetectedApps(apps, url, hostname, data.html);
95
96
    if ( Object.keys(apps).length ) {
97
      this.log(Object.keys(apps).length + ' apps detected: ' + Object.keys(apps).join(', ') + ' on ' + url, 'core');
98
    }
99
100
    this.driver.displayApps(this.detected[url], context);
101
  }
102
103
  /**
104
   * Cache detected ads
105
   */
106
  cacheDetectedAds(ad) {
107
    this.adCache.push(ad);
108
  }
109
110
  /**
111
   *
112
   */
113
  robotsTxtAllows(url) {
114
    return new Promise((resolve, reject) => {
115
      var parsed = this.parseUrl(url);
116
117
      this.driver.getRobotsTxt(parsed.host, parsed.protocol === 'https:')
118
        .then(robotsTxt => {
119
          robotsTxt.forEach(disallow => {
120
            if ( parsed.pathname.indexOf(disallow) === 0 ) {
121
              reject();
122
            }
123
          });
124
125
          resolve();
126
        });
127
    });
128
  };
129
130
  /**
131
   * Parse a URL
132
   */
133
  parseUrl(url) {
134
    var a = this.driver.document.createElement('a');
135
136
    a.href = url;
137
138
    a.canonical = a.protocol + '//' + a.host + a.pathname;
139
140
    return a;
141
  }
142
143
  /**
144
   *
145
   */
146
  parseRobotsTxt(robotsTxt) {
147
    var userAgent;
148
    var disallow = [];
149
150
    robotsTxt.split('\n').forEach(line => {
151
      var matches = /^User-agent:\s*(.+)$/i.exec(line);
152
153
      if ( matches ) {
154
        userAgent = matches[1].toLowerCase();
155
      } else {
156
        if ( userAgent === '*' || userAgent === 'wappalyzer' ) {
157
          matches = /^Disallow:\s*(.+)$/i.exec(line);
158
159
          if ( matches ) {
160
            disallow.push(matches[1]);
161
          }
162
        }
163
      }
164
    });
165
166
    return disallow;
167
  }
168
169
  /**
170
   *
171
   */
172
  ping() {
173
    if ( Object.keys(this.hostnameCache).length >= 50 || this.adCache.length >= 50 ) {
174
      this.driver.ping(this.hostnameCache, this.adCache);
175
176
      this.hostnameCache = {};
177
      this.adCache = [];
178
    }
179
  }
180
181
  /**
182
   * Enclose string in array
183
   */
184
  asArray(value) {
185
    return typeof value === 'string' ? [ value ] : value;
186
  }
187
188
  /**
189
   * Parse apps.json patterns
190
   */
191
  parsePatterns(patterns) {
192
    var parsed = {};
193
194
    // Convert string to object containing array containing string
195
    if ( typeof patterns === 'string' || patterns instanceof Array ) {
196
      patterns = {
197
        main: this.asArray(patterns)
198
      };
199
    }
200
201
    for ( var key in patterns ) {
0 ignored issues
show
Complexity introduced by
A for in loop automatically includes the property of any prototype object, consider checking the key using hasOwnProperty.

When iterating over the keys of an object, this includes not only the keys of the object, but also keys contained in the prototype of that object. It is generally a best practice to check for these keys specifically:

var someObject;
for (var key in someObject) {
    if ( ! someObject.hasOwnProperty(key)) {
        continue; // Skip keys from the prototype.
    }

    doSomethingWith(key);
}
Loading history...
202
      parsed[key] = [];
203
204
      this.asArray(patterns[key]).forEach(pattern => {
205
        var attrs = {};
206
207
        pattern.split('\\;').forEach((attr, i) => {
208
          if ( i ) {
209
            // Key value pairs
210
            attr = attr.split(':');
211
212
            if ( attr.length > 1 ) {
213
              attrs[attr.shift()] = attr.join(':');
214
            }
215
          } else {
216
            attrs.string = attr;
217
218
            try {
219
              attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression
220
            } catch (e) {
221
              attrs.regex = new RegExp();
222
223
              this.log(e + ': ' + attr, 'error', 'core');
224
            }
225
          }
226
        });
227
228
        parsed[key].push(attrs);
229
      });
230
    }
231
232
    // Convert back to array if the original pattern list was an array (or string)
233
    if ( 'main' in parsed ) {
234
      parsed = parsed.main;
235
    }
236
237
    return parsed;
238
  }
239
240
  resolveExcludes(apps) {
241
    var excludes = [];
242
243
    // Exclude app in detected apps only
244
    Object.keys(apps).forEach(appName => {
245
      var app = apps[appName];
246
247
      if ( app.props.excludes ) {
248
        this.asArray(app.props.excludes).forEach(excluded => {
249
          excludes.push(excluded);
250
        });
251
      }
252
    })
253
254
    // Remove excluded applications
255
    Object.keys(apps).forEach(appName => {
256
      if ( excludes.indexOf(appName) !== -1 ) {
257
        delete apps[appName];
258
      }
259
    })
260
  }
261
262
  resolveImplies(apps, url) {
263
    var checkImplies = true;
264
265
    // Implied applications
266
    // Run several passes as implied apps may imply other apps
267
    while ( checkImplies ) {
268
      checkImplies = false;
269
270
      Object.keys(apps).forEach(appName => {
271
        var app = apps[appName];
272
273
        if ( app && app.implies ) {
274
          this.asArray(app.props.implies).forEach(implied => {
275
            implied = this.parsePatterns(implied)[0];
276
277
            if ( !this.apps[implied.string] ) {
278
              this.log('Implied application ' + implied.string + ' does not exist', 'core', 'warn');
279
280
              return;
281
            }
282
283
            if ( !( implied.string in apps ) ) {
284
              apps[implied.string] = this.detected[url] && this.detected[url][implied.string] ? this.detected[url][implied.string] : new Application(implied.string, true);
285
286
              checkImplies = true;
287
            }
288
289
            // Apply app confidence to implied app
290
            Object.keys(app.confidence).forEach(id => {
291
              apps[implied.string].confidence[id + ' implied by ' + appName] = app.confidence[id] * ( implied.confidence ? implied.confidence / 100 : 1 );
292
            });
293
          });
294
        }
295
      });
296
    }
297
  }
298
299
  /**
300
   * Cache detected applications
301
   */
302
  cacheDetectedApps(apps, url) {
303
    if (!this.driver.ping instanceof Function) return;
0 ignored issues
show
Coding Style Best Practice introduced by
Curly braces around statements make for more readable code and help prevent bugs when you add further statements.

Consider adding curly braces around all statements when they are executed conditionally. This is optional if there is only one statement, but leaving them out can lead to unexpected behaviour if another statement is added later.

Consider:

if (a > 0)
    b = 42;

If you or someone else later decides to put another statement in, only the first statement will be executed.

if (a > 0)
    console.log("a > 0");
    b = 42;

In this case the statement b = 42 will always be executed, while the logging statement will be executed conditionally.

if (a > 0) {
    console.log("a > 0");
    b = 42;
}

ensures that the proper code will be executed conditionally no matter how many statements are added or removed.

Loading history...
304
305
    Object.keys(apps).forEach(appName => {
306
      var app = apps[appName];
307
308
      // Per URL
309
      this.detected[url][appName] = app;
310
311
      Object.keys(app.confidence).forEach(id => {
312
        this.detected[url][appName].confidence[id] = app.confidence[id];
313
      });
314
    })
315
316
    this.ping();
317
  }
318
319
  /**
320
   * Track detected applications
321
   */
322
  trackDetectedApps(apps, url, hostname, html) {
323
    if (!this.driver.ping instanceof Function) return;
0 ignored issues
show
Coding Style Best Practice introduced by
Curly braces around statements make for more readable code and help prevent bugs when you add further statements.

Consider adding curly braces around all statements when they are executed conditionally. This is optional if there is only one statement, but leaving them out can lead to unexpected behaviour if another statement is added later.

Consider:

if (a > 0)
    b = 42;

If you or someone else later decides to put another statement in, only the first statement will be executed.

if (a > 0)
    console.log("a > 0");
    b = 42;

In this case the statement b = 42 will always be executed, while the logging statement will be executed conditionally.

if (a > 0) {
    console.log("a > 0");
    b = 42;
}

ensures that the proper code will be executed conditionally no matter how many statements are added or removed.

Loading history...
324
325
    Object.keys(apps).forEach(appName => {
326
      var app = apps[appName];
327
328
      if ( this.detected[url][appName].getConfidence() >= 100 ) {
329
        if ( validation.hostname.test(hostname) && !validation.hostnameBlacklist.test(url) ) {
330
          this.robotsTxtAllows(url)
331
            .then(() => {
332
              if ( !( hostname in this.hostnameCache ) ) {
333
                this.hostnameCache[hostname] = {
334
                  applications: {},
335
                  meta: {}
336
                };
337
              }
338
339
              if ( !( appName in this.hostnameCache[hostname].applications ) ) {
340
                this.hostnameCache[hostname].applications[appName] = {
341
                  hits: 0
342
                };
343
              }
344
345
              this.hostnameCache[hostname].applications[appName].hits ++;
346
347
              if ( apps[appName].version ) {
348
                this.hostnameCache[hostname].applications[appName].version = app.version;
349
              }
350
            })
351
          .catch(() => console.log('Disallowed in robots.txt: ' + url))
0 ignored issues
show
Debugging Code introduced by
console.log looks like debug code. Are you sure you do not want to remove it?
Loading history...
352
        }
353
      }
354
    });
355
356
    // Additional information
357
    if ( hostname in this.hostnameCache ) {
358
      var match = html.match(/<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i);
359
360
      if ( match && match.length ) {
361
        this.hostnameCache[hostname].meta['language'] = match[1];
362
      }
363
    }
364
365
    this.ping();
366
  }
367
368
  /**
369
   * Analyze URL
370
   */
371
  analyzeUrl(app, url) {
372
    var patterns = this.parsePatterns(app.props.url);
373
374
    if ( patterns.length ) {
375
      patterns.forEach(pattern => {
376
        if ( pattern.regex.test(url) ) {
377
          this.addDetected(app, pattern, 'url', url);
378
        }
379
      });
380
    }
381
  }
382
383
  /**
384
   * Analyze HTML
385
   */
386
  analyzeHtml(app, html) {
387
    var patterns = this.parsePatterns(app.props.html);
388
389
    if ( patterns.length ) {
390
      patterns.forEach(pattern => {
391
        if ( pattern.regex.test(html) ) {
392
          this.addDetected(app, pattern, 'html', html);
393
        }
394
      });
395
    }
396
  }
397
398
  /**
399
   * Analyze script tag
400
   */
401
  analyzeScript(app, html) {
402
    var regex = new RegExp('<script[^>]+src=("|\')([^"\']+)', 'ig');
403
    var patterns = this.parsePatterns(app.props.script);
404
405
    if ( patterns.length ) {
406
      patterns.forEach(pattern => {
407
        var match;
408
409
        while ( ( match = regex.exec(html) ) ) {
410
          if ( pattern.regex.test(match[2]) ) {
411
            this.addDetected(app, pattern, 'script', match[2]);
412
          }
413
        }
414
      });
415
    }
416
  }
417
418
  /**
419
   * Analyze meta tag
420
   */
421
  analyzeMeta(app, html) {
422
    var regex = /<meta[^>]+>/ig;
423
    var patterns = this.parsePatterns(app.props.meta);
424
    var content;
425
    var match;
426
427
    while ( patterns && ( match = regex.exec(html) ) ) {
428
      for ( var meta in patterns ) {
0 ignored issues
show
Complexity introduced by
A for in loop automatically includes the property of any prototype object, consider checking the key using hasOwnProperty.

When iterating over the keys of an object, this includes not only the keys of the object, but also keys contained in the prototype of that object. It is generally a best practice to check for these keys specifically:

var someObject;
for (var key in someObject) {
    if ( ! someObject.hasOwnProperty(key)) {
        continue; // Skip keys from the prototype.
    }

    doSomethingWith(key);
}
Loading history...
429
        if ( new RegExp('(name|property)=["\']' + meta + '["\']', 'i').test(match) ) {
430
          content = match.toString().match(/content=("|')([^"']+)("|')/i);
431
432
          patterns[meta].forEach(pattern => {
433
            if ( content && content.length === 4 && pattern.regex.test(content[2]) ) {
434
              this.addDetected(app, pattern, 'meta', content[2], meta);
435
            }
436
          });
437
        }
438
      }
439
    }
440
  }
441
442
  /**
443
   * analyze response headers
444
   */
445
  analyzeHeaders(app, headers) {
446
    var patterns = this.parsePatterns(app.props.headers);
447
448
    if ( headers ) {
449
      Object.keys(patterns).forEach(header => {
450
        patterns[header].forEach(pattern => {
451
          header = header.toLowerCase();
452
453
          if ( header in headers && pattern.regex.test(headers[header]) ) {
454
            this.addDetected(app, pattern, 'headers', headers[header], header);
455
          }
456
        });
457
      });
458
    }
459
  }
460
461
  /**
462
   * Analyze environment variables
463
   */
464
  analyzeEnv(app, envs) {
465
    var patterns = this.parsePatterns(app.props.env);
466
467
    if ( patterns.length ) {
468
      patterns.forEach(pattern => {
469
        Object.keys(envs).forEach(env => {
470
          if ( pattern.regex.test(envs[env]) ) {
471
            this.addDetected(app, pattern, 'env', envs[env]);
472
          }
473
        })
474
      });
475
    }
476
  }
477
478
  /**
479
   * Analyze robots.txt
480
   */
481
  analyzeRobotsTxt(app, robotsTxt) {
482
    var patterns = this.parsePatterns(app.props.robotsTxt);
483
484
    if ( patterns.length ) {
485
      patterns.forEach(pattern => {
486
        if ( pattern.regex.test(robotsTxt) ) {
487
          this.addDetected(app, pattern, 'robotsTxt', robotsTxt);
488
        }
489
      });
490
    }
491
  }
492
493
  /**
494
   * Mark application as detected, set confidence and version
495
   */
496
  addDetected(app, pattern, type, value, key) {
497
    app.detected = true;
498
499
    // Set confidence level
500
    app.confidence[type + ' ' + ( key ? key + ' ' : '' ) + pattern.regex] = pattern.confidence || 100;
501
502
    // Detect version number
503
    if ( pattern.version ) {
504
      var versions = [];
505
      var version  = pattern.version;
506
      var matches  = pattern.regex.exec(value);
507
508
      if ( matches ) {
509
        matches.forEach((match, i) => {
510
          // Parse ternary operator
511
          var ternary = new RegExp('\\\\' + i + '\\?([^:]+):(.*)$').exec(version);
512
513
          if ( ternary && ternary.length === 3 ) {
514
            version = version.replace(ternary[0], match ? ternary[1] : ternary[2]);
515
          }
516
517
          // Replace back references
518
          version = version.replace(new RegExp('\\\\' + i, 'g'), match || '');
519
        });
520
521
        if ( version && versions.indexOf(version) === -1 ) {
522
          versions.push(version);
523
        }
524
525
        if ( versions.length ) {
526
          // Use the longest detected version number
527
          app.version = versions.reduce((a, b) => a.length > b.length ? a : b);
528
        }
529
      }
530
    }
531
  }
532
}
533
534
/**
535
 * Application class
536
 */
537
class Application {
538
  constructor(name, props, detected) {
539
    this.confidence      = {};
540
    this.confidenceTotal = 0;
541
    this.detected        = Boolean(detected);
542
    this.excludes        = [];
543
    this.name            = name;
544
    this.props           = props;
545
    this.version         = '';
546
  }
547
548
  /**
549
   * Calculate confidence total
550
   */
551
  getConfidence() {
552
    var total = 0;
553
554
    for ( var id in this.confidence ) {
0 ignored issues
show
Complexity introduced by
A for in loop automatically includes the property of any prototype object, consider checking the key using hasOwnProperty.

When iterating over the keys of an object, this includes not only the keys of the object, but also keys contained in the prototype of that object. It is generally a best practice to check for these keys specifically:

var someObject;
for (var key in someObject) {
    if ( ! someObject.hasOwnProperty(key)) {
        continue; // Skip keys from the prototype.
    }

    doSomethingWith(key);
}
Loading history...
555
      total += this.confidence[id];
556
    }
557
558
    return this.confidenceTotal = Math.min(total, 100);
559
  }
560
}
561
562
if ( typeof module === 'object' ) {
563
  module.exports = Wappalyzer;
564
}
565